home *** CD-ROM | disk | FTP | other *** search
- /* WIDE AREA INFORMATION SERVER SOFTWARE:
- No guarantees or restrictions. See the readme file for the full standard
- disclaimer. */
-
- /* Include file for the irhash.c file.
- Implements the building functions in irext.h */
-
- #ifndef IREXT_H
- #define IREXT_H
-
-
- /* An interface for adding new server types into the WAIS system.
- * The idea is to use the parsing and bookkeeping operatios of the serial
- * indexer, while allowing different invered file and signiture systems
- * to be added as back ends.
- *
- * - Tracy Shen and Brewster 3/91
- */
-
- /*
- * $Log: irext.h,v $
- * Revision 1.24 92/03/20 11:02:16 jonathan
- * Added word_position boolean to add_word. This is a new switch to allow an
- * indexer to ignore the word position info (if it cares), based on
- * indexer parameters.
- *
- * Revision 1.23 92/03/05 07:06:12 shen
- * update init_search_engine prototype to add two more parameters
- * For seeker-ram, the twonew parameters are: grow percent and textsixe
- *
- * Revision 1.22 92/02/29 20:11:57 jonathan
- * Conditionalized definition of DF_INDEPENDENT, etc.
- *
- * Revision 1.21 92/02/13 12:24:25 jonathan
- * conditionalized inclusion of irparse.h on BOOL.
- *
- *
- * Tracy changes:
- * - in function "add_word", add two more parameters, source and date
- * - add a new function "set_query_parameter"
- * proposed changes by brewster:
- * replace date_type with time_t: accepted
- * take out all "unsigned" type modifiers (tracey will concider this)
- * replace short with long: accepted
- * replace int with long (we port to 16 bit machines still): accepted
- * added source to delete_doc_id parameters: accepted
- * Proposed changes by brewster and tracy:
- * if routines are successful return 0, otherwise an error code: accepted
- * Proposed changes by harry:
- * make the dictionary value be any size. This can be done by
- * passing in a size arg or by passing in read and write routines.
- * have a function that says we will not be calling best_hit anymore.
- * proposed changes by brewster:
- * took out hash_pos from add word.
- * change source to a database* db.
- * added finished_best_hit, finished_search_word(db), finished_add_word
- * NOT ACCEPTED proposed by tracy (9/91)
- * create a function init_search_word and pass an array of db's
- * modify search_word dont pass db.
- * modify finish_search_word dont pass db.
- * create init_best_hit pass dbs
- * modify best_hit to pass db (to be modified)
- * modify a hit structure to contain a db
- * create se_init, se_exit (tracy will send these in)
- * create se_open_database, se_close_database (tracy will send these in)
- * ACCEPTED proposed by tracy (9/91)
- * create a function init_search_word and pass db
- * add total word count to db (not set in server side)
- * create init_best_hit
- * change dbs slot query_parameter_type from being a "database** dbs"
- * to "char** srcs" (not done on server side yet)
- * modify best_hit to have argument doc_id, best_character, score
- * create ext_open_database, ext_close_database
- * modify init_add_word to add char* src. (not done in server side)
- * proposed changes by brewster
- * modify best_hit to take both best_character and best_line
- * IMPLEMENTED proposed by brewster
- * specify that search_word take a downcased word
- * create function char *database_file(char *database_name)
- * that will return the name of the file that the database is stored in.
- * on Seeker and Beta this would return "INDEX" always,
- * and serial server this would returns its argument.
- * IMPLEMENTED proposed by brewster
- * the srcs list in set_query_parameter will be what the user
- * passed in the database fields of the Z39.50 request.
- * This means that if multiple src's are specified separate by comma's
- * then it is up to the backend to parse those out.
- * IMPLEMENTED proposed by brewster
- * the src field in init_add_word will always be NULL and
- * the information will be passed via set_query_parameter.
- * APPROVED proposed by brewster
- * add init_search_engine and finished_search_engine
- * this would be called when the server process starts and exists.
- * these functions could check to make sure everything is sane.
- * same arguments in ext_open_database
- * proposed by brewster
- * change scores to doubles rather than longs. maybe weights too.
- * IMPLEMENTED proposed by tracy
- * pass another argument to search_word and add_word:
- * long word_pair.
- * proposed by tracy: APPROVED
- * take out src arg from init_add_word
- * change arg name in search_word from doc_id to relevant_doc_number
- * take out dictionary_value from search_word
- * New arguments to ext_open_database:
- * initialize (same)
- * for_search (if true searches can happen, otherwise can not)
- * for_update (if true updates can happen, otherwise can not)
- * proposed change by tracy
- * add 2 more long arguments to init_search_engine and ext_open_database
- * (for seeker, the first argument should be the percentage of
- * CM memory for signatures. For open_database put in the total
- * raw text size).
- *
- */
-
- #include "cdialect.h"
- #include "irfiles.h" /* for database */
-
- #ifdef BOOL
- #include "irparse.h" /* for boolean searches */
- #endif
-
- #ifdef __cplusplus
- /* declare these as C style functions */
- extern "C"
- {
- #endif /* def __cplusplus */
-
- /* ============================
- * === Control Functions ===
- * ============================*/
-
- /*
- * SE_init - Search Engine initialization function
- *
- * Parameter description:
- * if_update - if update is to be performed in this run, value
- * be True (1L) or False(0L).
- * if_query - if query is to be performed in this run, value
- * be True (1L) or False (0L).
- *
- * Functional description:
- * This function should be the first function call FE(front end)
- * make to the BE(back end) SE(search engine).
- * It gives the SE a chance to initialize its global variables
- * to best serve FE's requests efficiently.
- * It only needs to * be called once each run. Sebsequent calls
- * will be ignored.
- * For a batch update ( eg, waisindex run),
- * parameters if_update should be set to True, and if_quesry be False.
- * For serving query (eg, a waisserver run), the if_query will be True.
- * To allow on-line update while serving query, if_update should be
- * set to True, otherwise be False.
- * The waisserver has to be able to take on-line update request and
- * update the search engine's database.
- *
- */
-
- long SE_init _AP(( long if_update, long if_query));
-
- /*
- * SE_exit - Search Engine exit function
- *
- * Parameter description:
- * None
- *
- * Functional description:
- * This function should be the last function call FE(front end)
- * makes to SE. It gives the SE a chance to flush data kept in
- * buffers, clean up temporary files, and free up resourecs.
- */
-
- long SE_exit _AP(( void));
-
-
-
- long SE_open_database _AP (( database *db,
- long if_initialize,
- long if_update,
- long if_query,
- long *parameter1,
- long *parameter2));
- /*
- * SE_open_database - open a database
- *
- * Parameter description:
- * db - pointer to a database structure. The structure should
- * contain a field "SE_private_tag" of type void *.
- * The SE will fill in this field when the
- * database is open. This is the search enginer's pointer
- * to its data structure of the database.
- * if_initialize - if initialize this databse. If value is
- * True, the database will be set empty. If
- * one already exists, it will be purged
- * or marked old according to the system maintenance
- * policy employed.
- * if_update - if update is to be performed on this database, value
- * be True (1L) or False(0L).
- * parameter1 and parameter2 - these are additional info the
- * SE needs from the FE. They are pointers. If
- * a SE does not need extra info, FE will just pass
- * NULL.
- * For new seeker, parameter1 is the databases max size
- * in percentage to the full-system-load.
- * CM memory is a limited resource to be shared by
- * multiple databases, and is not efficient
- * in dynamic re-allocation.
- * Seeker requires the FE to tell it the maximum size the
- * database can grow to thus it can pre-allocate the
- * right amount of processors to the databse and will
- * wrap around when it reachs the limit to squeeze out
- * old data.
- * The It is specified as the
- * percentage of the full-load CM signature pool.
- * For example, on a 8K CM with small memory, it can
- * holds up to 200 megabytes raw text size data.
- * If parameter
- *
- */
-
- long SE_close_database _AP (( database *db));
-
- long SE_checkpointing _AP(( database *db));
-
-
-
-
- /* this is called when the server or indexer is started up
- * before any other operations are run.
- *
- * If this is a server starting, then file is the directory of the index.
- * If this is an indexer starting, then file is the index file.
- * NOTE - This routine may be called more than once
- *
- * return values: 0 if successful, non-0 if error
- * defined error conditions:
- *
- * -1 insufficient resources
- */
-
- long init_search_engine _AP((char* file,
- boolean initialize,
- boolean for_search,
- long cm_mem_percent,
- long rawtext_size,
- long grow_percent));
-
-
- /* this is called when the server is shut down.
- *
- * return values: 0 if successful, non-0 if error
- * defined error conditions:
- */
-
- long finished_search_engine _AP((void));
-
-
- /*
- * ext_open_database: This function will be called on a database before
- * any operations are done on it. It might be called multiple times
- * with the same database before a close is done.
- * initialize: means that the database should be cleared of all state
- * since it will be rebuilt from scratch.
- * for_search: means that the database will only be used for searching.
- * if this is false, then it can be searched and added to.
- * return values: 0 if successful, non-0 if error
- * defined error conditions:
- *
- *
- */
-
- long ext_open_database _AP((database *db,
- boolean initialize,
- boolean for_search));
-
- /*
- * ext_close_database: This function will be called after all operations
- * on this database are done.
- * return values: 0 if successful, non-0 if error
- * defined error conditions:
- *
- */
-
- long ext_close_database _AP((database *db));
-
- char *database_file _AP((char *database_name));
-
- /* ============================
- * === Building Functions ===
- * ============================*/
-
- /* init_add_word add_word... finished_search_word
- * is the sequence for creating an update. When a finished_add_word is done,
- * then the it is safe (and expected) that the builder will flush things to files.
- * set query parameter can be called at any time between documents during an add.
- */
-
- /*
- * init_add_word: called before any calls to add_word. finished_add_word
- * will be called before another init_add_word is called.
- * db is the one that will be added to.
- * parameter1 and parameter2 are implementation specific.
- * return values: 0 if successful, non-0 if error
- * defined error conditions:
- *
- */
- long init_add_word _AP ((database *db,
- long parameter1, long parameter2));
-
- /*
- * add_word: add this word to the database
- * return values: 0 if successful, non-0 if error
- * defined error conditions:
- *
- */
- long add_word _AP((
- char *word, /* the word to be indexed, this could be a
- word pair. If NULL there are no more words
- to be indexed */
- long char_pos, /* the position of the start of the
- word */
- long line_pos, /* this is passed for the best
- section calculation */
- long weight, /* how important the word looks
- syntactically (such as is it bold)
- NOT used by signature system */
- long doc_id, /* current document, this will never be 0 */
- time_t date, /* display day of this document, 0 if not known */
- long word_pair, /* 1 if it is, 0 if not */
- database* db, /* database to insert the document */
- boolean word_position /* whether the position is valid or not */
- ));
-
- /*
- * finished_add_word: states that there are no more words to add
- * to this database.
- *
- * return values: 0 if successful, non-0 if error
- * defined error conditions:
- *
- */
-
- long finished_add_word _AP((database *db));
-
- /* ===============================
- * === Maintenance Functions ===
- * ===============================*/
-
- /*
- * delete_doc_id : delete a document
- * return values: 0, successfull
- * -1, document not found
- *
- */
- long delete_doc_id _AP((long doc_id, database *db));
-
- /* =============================
- * === Searching Functions ===
- * =============================*/
-
-
- /*
- * set_query_parameter : set query parameter
- * set search attributes such as date factor, document source ids,
- * and maximum number of documents returned in a search ( the last
- * one is an important performance factor to signature type system)
- * The search artributes applies to all comming queries until
- * they are re-set by next set_query_parameter call.
- *
- * return values: none
- *
- */
- #define SET_MAX_RETRIEVED_MASK 1
- #define SET_DATE_FACTOR_MASK 2
- #define SET_SELECT_SOURCE 4
-
- /* enum literals for date_factor */
- #ifndef DF_INDEPENDENT
- #define DF_INDEPENDENT 1
- #define DF_LATER 2
- #define DF_EARLIER 3
- #endif
-
- typedef struct {
- long max_hit_retrieved;
- /* max number of hits can be returned by
- * the search engine. For a signature
- * type system, the default value is 20
- */
- long date_factor; /* default is DF_INDEPENDENT */
- long num_db; /* value of zero indicating select all,
- * default is selecting all
- */
- char **srcs; /* string of sources to be searched */
- } query_parameter_type;
-
- /*
- * set_query_parameter: set a mode variable for the search engine
- * return values: 0 if successful, non-0 if error
- * defined error conditions:
- *
- */
-
- long set_query_parameter _AP ((
- long mask,
- query_parameter_type *parameters
- /* fields in the query parameter structure are only
- * interpreted when the corresponding mask bit
- * is set in the mask argument.
- */
- ));
-
-
-
- /*
- * init_search_word: called before any search_word is called in this query.
- * The only operations that occur after this is search_word.
- * return values: 0 if successful, non-0 if error
- * defined error conditions:
- *
- */
-
- long init_search_word _AP((database* db));
-
- /*
- * search_word: searches for a word in the index. it side effects
- * internal state so that best_hit will return the correct
- * results.
- * return values: 0 if successful, non-0 if error
- * defined error conditions:
- *
- */
-
- long search_word
- _AP ((char *word, /* the word to be searched for */
- long char_pos, /* the position of the start of the word */
- long line_pos, /* is this needed? not for signature system */
- long weight, /* how important the word looks syntactically,
- such as is it bold */
- long relevant_doc_number,/* current document, seed words is 0,
- then it increments into the relevant
- document */
- long word_pair, /* 1 if it is, 0 if not */
- database *db
- ));
-
-
- /*
- * finished_search_word: states that there are no more words that will
- * be searched for before best_hit will be called.
- *
- * return values: 0 if successful, non-0 if error
- * defined error conditions:
- *
- */
-
- long finished_search_word _AP((database *db));
-
-
-
- /*
- * init_best_hit: called before any best_hit is called in this query.
- * The only operations that occur after this is best_hit.
- * return values: 0 if successful, non-0 if error
- * defined error conditions:
- *
- */
-
- long init_best_hit _AP((database *db));
-
- /*
- * best-hit :
- *
- * return values: 0, successfull
- * -1, no more documents to return
- * Other values returned to signal future signals.
- *
- */
- long best_hit _AP ((database* db,long *doc_id, long *best_character,
- long *best_line, long *score));
-
- /*
- * finished_best_hit: states that there are no more best_hits will be called
- * before the next set of search_words or add_words.
- *
- * return values: 0 if successful, non-0 if error
- * defined error conditions:
- *
- */
-
- long finished_best_hit _AP((database* db));
-
- #ifdef __cplusplus
- }
- #endif /* def __cplusplus */
-
- #endif /* ndef IREXT_H */
-